pitchers <- select(tibble(Pitching), playerID, yearID, teamID, IPouts, BB, SO, BAOpp, ERA, W, L) # Create a Net Wins column pitchers <- pitchers %>% mutate(NetWins = W-L) # Only keep rows where there is no missing data pitchers <- pitchers[complete.cases(pitchers),] # Normalize data so that coefficients are meaningful pitchers <- pitchers %>% mutate(normIPouts = (IPouts - mean(IPouts)) / sd(IPouts)) pitchers <- pitchers %>% mutate(normBB = (BB - mean(BB)) / sd(BB)) pitchers <- pitchers %>% mutate(normSO = (SO - mean(SO)) / sd(SO)) pitchers <- pitchers %>% mutate(normBAOpp = (BAOpp - mean(BAOpp)) / sd(BAOpp)) pitchers <- pitchers %>% mutate(normERA = (ERA - mean(ERA)) / sd(ERA))
#Here, we look at the relevant data from Lahman
#Select columns from the Pitching dataset
pitchers <- select(tibble(Pitching), playerID, yearID, teamID, IPouts, BB, SO, BAOpp, ERA, W, L)
#Create a Net Wins column
pitchers <- pitchers %>% mutate(NetWins = W-L)
#Only keep rows where there is no missing data
pitchers <- pitchers[complete.cases(pitchers),]
#Normalize data so that coefficients are meaningful
pitchers <- pitchers %>% mutate(normIPouts = (IPouts - mean(IPouts)) / sd(IPouts))
pitchers <- pitchers %>% mutate(normBB = (BB - mean(BB)) / sd(BB))
pitchers <- pitchers %>% mutate(normSO = (SO - mean(SO)) / sd(SO))
pitchers <- pitchers %>% mutate(normBAOpp = (BAOpp - mean(BAOpp)) / sd(BAOpp))
pitchers <- pitchers %>% mutate(normERA = (ERA - mean(ERA)) / sd(ERA))